3  PBMCs: Data Visualization

3.1 Package Set Up

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(colorRamp2)
Warning: package 'colorRamp2' was built under R version 4.4.0
library(ComplexHeatmap)
Loading required package: grid
========================================
ComplexHeatmap version 2.18.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference

If you use it in published research, please cite either one:
- Gu, Z. Complex Heatmap Visualization. iMeta 2022.
- Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
    genomic data. Bioinformatics 2016.


The new InteractiveComplexHeatmap package can directly export static 
complex heatmaps into an interactive Shiny app with zero effort. Have a try!

This message can be suppressed by:
  suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
library(cowplot)

Attaching package: 'cowplot'

The following object is masked from 'package:lubridate':

    stamp
library(ggplot2)
library(ggridges)
library(vegan)
Loading required package: permute
Loading required package: lattice
library(scales)

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor
library(colorspace)

3.2 Loading in dataframes

PT106_test_fisher_categorized <- readRDS("./PT106_test_fisher_categorized.rds")
PT108_test_fisher_categorized <- readRDS("./PT108_test_fisher_categorized.rds")
PT111_test_fisher_categorized <- readRDS("./PT111_test_fisher_categorized.rds")
PT113_test_fisher_categorized <- readRDS("./PT113_test_fisher_categorized.rds")
PT114_test_fisher_categorized <- readRDS( "./PT114_test_fisher_categorized.rds")
PT116_test_fisher_categorized <- readRDS("./PT116_test_fisher_categorized.rds")
PT117_test_fisher_categorized <- readRDS("./PT117_test_fisher_categorized.rds")
PT118_test_fisher_categorized <- readRDS("./PT118_test_fisher_categorized.rds")
PT119_test_fisher_categorized <- readRDS("./PT119_test_fisher_categorized.rds")
PT122_test_fisher_categorized <- readRDS("./PT122_test_fisher_categorized.rds")

3.3 Combine Dataframe for Visualization

fisher_clusters <- bind_rows(
  PT122_test_fisher_categorized  %>% mutate(patient_id = "PT122"),
  PT119_test_fisher_categorized  %>% mutate(patient_id = "PT119"),
  PT118_test_fisher_categorized  %>% mutate(patient_id = "PT118"),
  PT117_test_fisher_categorized  %>% mutate(patient_id = "PT117"),
  PT116_test_fisher_categorized  %>% mutate(patient_id = "PT116"),
  PT114_test_fisher_categorized  %>% mutate(patient_id = "PT114"),
  PT113_test_fisher_categorized  %>% mutate(patient_id = "PT113"),
  PT111_test_fisher_categorized  %>% mutate(patient_id = "PT111"),
  PT108_test_fisher_categorized  %>% mutate(patient_id = "PT108"),
  PT106_test_fisher_categorized  %>% mutate(patient_id = "PT106")
)

ex <- fisher_clusters %>% 
  group_by(patient_id, new_category) %>%
  summarize(count=n(), .groups = "drop")

3.4 Plotting Proportion of Postvax Expanded and Postnivo Expanded

library(dplyr)
library(ggplot2)

# Define the relevant categories
sig_categories <- c("post_W14(vaccine)_expanded", "post_W22(booster+Nivo)_expanded")

# Filter only significant clones
sig_clones <- fisher_clusters %>%
  filter(new_category %in% sig_categories)

# Create a new column to label as postvax or postnivo
sig_clones <- sig_clones %>%
  mutate(expansion_type = case_when(
    new_category == "post_W14(vaccine)_expanded" ~ "Postvax",
    new_category == "post_W22(booster+Nivo)_expanded" ~ "Postnivo"
  ))

# Count and calculate proportions per patient
sig_summary <- sig_clones %>%
  group_by(patient_id, expansion_type) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(patient_id) %>%
  mutate(proportion = count / sum(count))

# Set factor levels for patient order
sig_summary$patient_id <- factor(
  sig_summary$patient_id,
  levels = c("PT106","PT108", "PT116", "PT118", "PT119", "PT122", "PT111", "PT113", "PT114", "PT117")
)

# patient groupings
progressive <- c("PT111", "PT114", "PT117", "PT113")
transient_response <- c("PT106", "PT108", "PT116")
stable_disease <- c("PT118", "PT119", "PT122")

# Add response column
sig_summary <- sig_summary %>%
  mutate(response = case_when(
    patient_id %in% progressive ~ "Progressive",
    patient_id %in% transient_response ~ "Transient_Response",
    patient_id %in% stable_disease ~ "Stable_Disease",
    TRUE ~ NA_character_  # For patients not in any group
  ))

# Define color mappings
expansion_colors <- c("Postvax" = "#F8766D", "Postnivo" = "#87CEEB")
response_colors <- c(
  "Progressive" = "#E69F00",
  "Transient_Response" = "#56B4E9",
  "Stable_Disease" = "#009E73"
)


sig_summary$response <- factor(sig_summary$response, levels=c("Transient_Response", "Stable_Disease", "Progressive"))
# Plot with response group as outline color
ggplot(sig_summary, aes(x = patient_id, y = proportion, fill = expansion_type)) +
  geom_bar(stat = "identity", size = 1, position = "stack") +
  labs(
    title = "Proportion of Significant Clones (Postvax vs Postnivo)",
    x = "Patient ID",
    y = "Proportion of Significant Clones",
    fill = "Expansion Type",
    color = "Response Group"
  ) +
  scale_fill_manual(values = expansion_colors) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 1) + facet_grid(~response, scales = "free_x", space = "free_x")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

3.5 Plotting Number of Postvax Expanded and Postnivo Expanded

sig_summary <- sig_summary %>% mutate(new_category = case_when(
    expansion_type == "Postvax" ~ "Post_Vaccine_Expanded",
    expansion_type == "Postnivo" ~ "Post_Nivo_Expanded",
    TRUE ~ expansion_type
  ))

sig_summary$new_category <- factor(sig_summary$new_category, levels=c("Post_Vaccine_Expanded","Post_Nivo_Expanded"))

sig_summary %>%
  ggplot(aes(x = patient_id, y = count, fill = response)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +  # set dodge width
  geom_text(aes(label = count), 
            position = position_dodge(width = 0.9), 
            vjust = -0.5,  # move text slightly above the bar
            size = 4) +
  facet_wrap(~new_category, scales = "free_y") +
  labs(
    title = "Number of Postvaccine and Postnivo Expanded Clones",
    x = "Patient ID",
    y = "Clone Count",
    fill = "Response Group"
  ) +
  scale_fill_manual(
    values = c(
      "Progressive" = "#E69F00",
      "Transient_Response" = "#56B4E9",
      "Stable_Disease" = "#009E73"
    )
  ) +
  theme_classic(base_size = 14) +
  theme(
    strip.text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

3.6 Conducting t-test for the Number of Postvax Expanded vs Postnivo Expanded.

# Combine all patient data
all_counts <- bind_rows(
  PT122_test_fisher_categorized  %>% mutate(patient_id = "PT122"),
  PT119_test_fisher_categorized  %>% mutate(patient_id = "PT119"),
  PT118_test_fisher_categorized  %>% mutate(patient_id = "PT118"),
  PT117_test_fisher_categorized  %>% mutate(patient_id = "PT117"),
  PT116_test_fisher_categorized  %>% mutate(patient_id = "PT116"),
  PT114_test_fisher_categorized  %>% mutate(patient_id = "PT114"),
  PT113_test_fisher_categorized  %>% mutate(patient_id = "PT113"),
  PT111_test_fisher_categorized  %>% mutate(patient_id = "PT111"),
  PT108_test_fisher_categorized  %>% mutate(patient_id = "PT108"),
  PT106_test_fisher_categorized  %>% mutate(patient_id = "PT106")
)

# Count clonotypes by category and patient
# Postvax Expanded vs Postnivo Expanded across response groups
clonotype_counts <- all_counts %>%
  group_by(patient_id, new_category) %>%
  summarise(count = n(), .groups = "drop")

# Only interested in postvax and postnivo comparison
paired_data <- clonotype_counts %>%
  filter(new_category %in% c("post_W14(vaccine)_expanded", "post_W22(booster+Nivo)_expanded")) %>%
  pivot_wider(names_from = new_category, values_from = count) %>%
  replace_na(list(
    `post_W14(vaccine)_expanded` = 0,
    `post_W22(booster+Nivo)_expanded` = 0
  ))

# Paired t-test
t_test_result <- t.test(paired_data$`post_W14(vaccine)_expanded`,
                        paired_data$`post_W22(booster+Nivo)_expanded`,
                        paired = TRUE)

# Reshape for plotting
long_counts_df <- paired_data %>%
  pivot_longer(
    cols = c(`post_W14(vaccine)_expanded`, `post_W22(booster+Nivo)_expanded`),
    names_to = "Condition",
    values_to = "Significant_Clones"
  ) %>%
  mutate(
    Condition = recode(Condition,
      "post_W14(vaccine)_expanded" = "Post_W14(Vaccine)",
      "post_W22(booster+Nivo)_expanded" = "Post_W22(Nivo)"
    ),
    Patient = patient_id
  )

# Annotate patient groups
progressive <- c("PT111", "PT114", "PT117", "PT113")
transient_response <- c("PT106", "PT108", "PT116")
stable_disease <- c("PT118", "PT119", "PT122")

long_counts_df <- long_counts_df %>%
  mutate(response = case_when(
    Patient %in% progressive ~ "Progressive",
    Patient %in% transient_response ~ "Transient_Response",
    Patient %in% stable_disease ~ "Stable_Disease",
    TRUE ~ NA_character_
  ))

# Final Plot (faceted by response)
ggplot(long_counts_df, aes(x = Condition, y = Significant_Clones, group = Patient)) +
  geom_point(aes(color = Patient), size = 3, alpha = 0.6) +
  geom_line(aes(color = Patient), size = 1, alpha = 0.6) +
  labs(
    title = "Paired t-test of Significant Clones",
    subtitle = paste("Paired t-test p-value:", signif(t_test_result$p.value, 3)),
    x = "Condition",
    y = "Number of Significant Clones"
  ) +
  facet_wrap(~response, scales = "free_y") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

3.7 Comparing the Number of Postvax Clone Counts Separated by Response Groups

# Filter postvax expanded clones
postvax_counts <- clonotype_counts %>%
  filter(new_category == "post_W14(vaccine)_expanded")

# Label patients by response group
postvax_counts <- postvax_counts %>%
  mutate(response_group = case_when(
    patient_id %in% progressive ~ "Progressive",
    patient_id %in% c(transient_response, stable_disease) ~ "Non_Progressive",
    TRUE ~ NA_character_
  ))

# Perform unpaired t-test
unpaired_t <- t.test(count ~ response_group, data = postvax_counts)

# Plot results
ggplot(postvax_counts, aes(x = response_group, y = count, color = response_group)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.2) +
  geom_jitter(width = 0.2, size = 3, alpha = 0.6) +
  labs(
    title = "Comparison of Postvax Expanded Clones",
    subtitle = paste("Unpaired t-test p-value:", signif(unpaired_t$p.value, 3)),
    x = "Response Group",
    y = "Number of Postvax Expanded Clones"
  ) +
  scale_color_manual(values = c(
    "Progressive" = "#E69F00",
    "Non_Progressive" = "#56B4E9"
  )) +
  theme_minimal(base_size = 14)

# Filter postvax expanded clones
postvax_counts <- clonotype_counts %>%
  filter(new_category == "post_W22(booster+Nivo)_expanded")

# Label patients by response group
postvax_counts <- postvax_counts %>%
  mutate(response_group = case_when(
    patient_id %in% progressive ~ "Progressive",
    patient_id %in% c(transient_response, stable_disease) ~ "Non_Progressive",
    TRUE ~ NA_character_
  ))

# Perform unpaired t-test
unpaired_t <- t.test(count ~ response_group, data = postvax_counts)

# Plot results
ggplot(postvax_counts, aes(x = response_group, y = count, color = response_group)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.2) +
  geom_jitter(width = 0.2, size = 3, alpha = 0.6) +
  labs(
    title = "Comparison of Postnivo Expanded Clones",
    subtitle = paste("Unpaired t-test p-value:", signif(unpaired_t$p.value, 3)),
    x = "Response Group",
    y = "Number of Postvax Expanded Clones"
  ) +
  scale_color_manual(values = c(
    "Progressive" = "#E69F00",
    "Non_Progressive" = "#56B4E9"
  )) +
  theme_minimal(base_size = 14)

3.8 Heatmap to Show Difference Between Postnivo - Postvaccine Frequencies (among postvaccine expanded clones)

# color definition function 
col_fun <- colorRamp2(
  breaks = c(-0.01, 0, 0.01),
  colors = c("blue", "white", "red")
)

make_heatmap <- function(df, patient_id) {
  temp <- df[df$new_category == "post_W14(vaccine)_expanded", ]
  temp <- temp[order(-temp$Postvaccine_umi_fraction), ]
  # difference in umi_fraction (Postnivo - Postvaccine)
  temp$diff <- temp$Postnivo_umi_fraction - temp$Postvaccine_umi_fraction
  temp <- temp[order(-temp$diff), ]
  mat <- matrix(temp$diff)
  colnames(mat) <- paste0("Patient:", patient_id)
  Heatmap(mat, cluster_rows = FALSE, cluster_columns = FALSE, show_row_names = FALSE,col=col_fun, heatmap_legend_param = list(title = "Nivo-Vax"))
}

# Create heatmaps for each patient
p1 <- make_heatmap(PT108_test_fisher_categorized, 108)
p2 <- make_heatmap(PT116_test_fisher_categorized, 116)
p3 <- make_heatmap(PT113_test_fisher_categorized, 113)
p4 <- make_heatmap(PT114_test_fisher_categorized, 114)
p5 <- make_heatmap(PT117_test_fisher_categorized, 117)
p6 <- make_heatmap(PT118_test_fisher_categorized, 118)
p7 <- make_heatmap(PT119_test_fisher_categorized, 119)
p8 <- make_heatmap(PT122_test_fisher_categorized, 122)

p1_grid <- grid.grabExpr(draw(p1))
p2_grid <- grid.grabExpr(draw(p2))
p3_grid <- grid.grabExpr(draw(p3))
p4_grid <- grid.grabExpr(draw(p4))
p5_grid <- grid.grabExpr(draw(p5))
p6_grid <- grid.grabExpr(draw(p6))
p7_grid <- grid.grabExpr(draw(p7))
p8_grid <- grid.grabExpr(draw(p8))

plot_grid(p1_grid, p2_grid, p3_grid, p4_grid,p5_grid,p6_grid,p7_grid,p8_grid, ncol=8)

3.9 Plotting Frequency of individual clones

draw_lod_line <- function(yinter) {
  indiv_lod_line <- geom_hline(yintercept = yinter, linetype = "dashed", color = "red")
  return(indiv_lod_line)
}

plot_frequency <- function(input_df, timepoint_order, p_id) {
  # Long format for plotting
  input_df_pivot <- input_df %>%
    dplyr::select(clonotype_id, contains("umi_fraction")) %>%
    tidyr::pivot_longer(
      cols = dplyr::contains("umi_fraction"),
      names_to = "Timepoint",
      values_to = "UMI_Fraction"
    )
  
  input_df_pivot$Timepoint <- factor(input_df_pivot$Timepoint, levels = timepoint_order)
  input_df_pivot$UMI_Fraction <- input_df_pivot$UMI_Fraction
  input_df_plot_data <- input_df_pivot %>%
    left_join(input_df %>% dplyr::select(clonotype_id, new_category), by = "clonotype_id")
  
  # Count clonotypes per category
  clone_counts <- input_df %>%
    count(new_category) %>%
    deframe()  # turns into a named vector
  
  # Create legend labels with counts
  legend_labels <- c(
    "non_expanded" = paste0("non_expanded (n=", clone_counts["non_expanded"], ")"),
    "post_W14(vaccine)_expanded" = paste0("post_W14(vaccine)_expanded (n=", clone_counts["post_W14(vaccine)_expanded"], ")"),
    "post_W22(booster+Nivo)_expanded" = paste0("post_W22(booster+Nivo)_expanded (n=", clone_counts["post_W22(booster+Nivo)_expanded"], ")")
  )
  
  # Plot
  p1 <- ggplot(input_df_plot_data, aes(x = Timepoint, y = UMI_Fraction, group = clonotype_id, color = new_category)) +
    geom_line(data = input_df_plot_data %>% filter(new_category == "non_expanded"), 
              aes(color = "non_expanded"), size = 1, alpha = 0.5) +
    geom_line(data = input_df_plot_data %>% filter(new_category == "post_W14(vaccine)_expanded"), 
              aes(color = "post_W14(vaccine)_expanded"), size = 1, alpha = 0.5) +
    geom_line(data = input_df_plot_data %>% filter(new_category == "post_W22(booster+Nivo)_expanded"), 
              aes(color = "post_W22(booster+Nivo)_expanded"), size = 1, alpha = 0.5) +
    scale_color_manual(
      values = c(
        "non_expanded" = "grey90",
        "post_W14(vaccine)_expanded" = "darkgoldenrod3",
        "post_W22(booster+Nivo)_expanded" = "#0072B2"
      ),
      labels = legend_labels
    ) +
    labs(title = paste("Clonotype UMI Fraction Across Timepoints", p_id),
         x = "Timepoint",
         y = "UMI Fraction",
         color = "Clone Category") +
    annotation_logticks(sides = "l") +
    draw_lod_line(input_df$visualization_LOD[1]) +
    annotate("text", x = as.numeric(length(timepoint_order)) + 0.25,
             y = input_df$visualization_LOD[1]*2, label = "LOD", size = 4, color = "red") +
    theme_classic(base_size = 15) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
      axis.text.y = element_text(size = 8),
      axis.title.x = element_text(size = 8, face = "bold"),
      axis.title.y = element_text(size = 8, face = "bold"),
      legend.text = element_text(size = 8),
      legend.title = element_text(size = 8, face = "bold"),
      plot.title = element_text(size = 10, face = "bold", hjust = 0.5),
      plot.margin = margin(5, 60, 5, 5)
    ) +
    scale_y_log10(labels = label_number(accuracy = 0.00001)) + 
    guides(color = guide_legend(override.aes = list(size = 5, alpha = 1)))

  return(p1)
}

3.10 Plotting Line Frequency Plots For Each Patient

PT106_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "w16_umi_fraction")
PT106_freq <- plot_frequency(PT106_test_fisher_categorized, PT106_timepoint_order, "(PT106)")

PT108_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction", "w40_umi_fraction")
PT108_freq <- plot_frequency(PT108_test_fisher_categorized, PT108_timepoint_order, "(PT108)")

PT111_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction")
PT111_freq <- plot_frequency(PT111_test_fisher_categorized, PT111_timepoint_order, "(PT111)")

PT113_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT113_freq <- plot_frequency(PT113_test_fisher_categorized, PT113_timepoint_order, "(PT113)")

PT114_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT114_freq <- plot_frequency(PT114_test_fisher_categorized, PT114_timepoint_order, "(PT114)")

PT116_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction", "w40_umi_fraction")
PT116_freq <- plot_frequency(PT116_test_fisher_categorized, PT116_timepoint_order, "(PT116)")

PT117_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT117_freq <- plot_frequency(PT117_test_fisher_categorized, PT117_timepoint_order, "(PT117)")

PT118_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT118_freq <- plot_frequency(PT118_test_fisher_categorized, PT118_timepoint_order, "(PT118)")

PT119_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT119_freq <- plot_frequency(PT119_test_fisher_categorized, PT119_timepoint_order, "(PT119)")

PT122_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
PT122_freq <- plot_frequency(PT122_test_fisher_categorized, PT122_timepoint_order, "(PT122)")

3.11 Plotting Line Plots, Groupping Each Response Groups

# Create plot grids
#grid1 <- plot_grid(PT111_freq, PT113_freq, PT114_freq, PT117_freq, ncol = 2)
#grid2 <- plot_grid(PT118_freq, PT119_freq, PT122_freq, ncol = 2)
#grid3 <- plot_grid(PT106_freq,PT108_freq, PT116_freq, ncol=3)

PT111_freq

PT113_freq

PT114_freq

PT117_freq

PT118_freq

PT119_freq

PT122_freq

PT106_freq

PT108_freq

PT116_freq

3.12 Plotting Frequency Mean of each Clone Category

#if there is LATE timepoint, add to the plot as well.
plot_frequency_mean <- function(input_df, timepoint_order,p_id) {
  
  input_df_vis <- input_df
  
  # Add new subcategories for Post_Nivo_Expanded
input_df_vis <- input_df_vis %>%
  mutate(PostVaccine_UMI = Postvaccine_umi,  # Adjust to match your actual column name
         new_category = case_when(
           new_category == "post_W14(vaccine)_expanded" ~ "Post_Vaccine_Expanded",
           new_category == "post_W22(booster+Nivo)_expanded" ~ "Post_Nivo_Expanded"
         ))

# Update factor levels for consistent plotting
input_df_vis$new_category <- factor(input_df_vis$new_category,
                                        levels = c("Post_Vaccine_Expanded",                                           "Post_Nivo_Expanded"))
  input_df_long <- input_df_vis %>%
    dplyr::select(clonotype_id, contains("umi_fraction")) %>%
    tidyr::pivot_longer(
      cols = dplyr::contains("umi_fraction"),
      names_to = "Timepoint",
      values_to = "UMI_Fraction"
    )
  
  input_df_long$Timepoint <- factor(input_df_long$Timepoint, levels = timepoint_order)
  input_df_long$UMI_Fraction <- (input_df_long$UMI_Fraction)
  
  input_df_plot_data <- input_df_long %>%
    left_join(input_df_vis %>% dplyr::select(clonotype_id,response,new_category), by = "clonotype_id")
  
summary_data <- input_df_plot_data %>%
  filter(new_category %in% c("Post_Vaccine_Expanded", "Post_Nivo_Expanded")) %>%
  group_by(new_category, Timepoint) %>%
  summarise(
    mean_UMI = mean(UMI_Fraction, na.rm = TRUE),
    se_UMI = sd(UMI_Fraction, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
)

# Plot with geom_ribbon and geom_line for mean ± SE
p1 <- ggplot(summary_data, aes(x = Timepoint, y = mean_UMI, color = new_category, group = new_category, fill = new_category)) +
  geom_ribbon(aes(ymin = mean_UMI - se_UMI, ymax = mean_UMI + se_UMI), alpha = 0.3, color = NA) +
  geom_line(size = 1) +
  labs(title = paste("Clusters -", p_id),
       x = "Timepoint", y = "Mean UMI Fraction") +
  annotation_logticks(sides = "l") +
  draw_lod_line(input_df_vis$visualization_LOD[1]) +
  annotate("text", x = as.numeric(length(timepoint_order)) + 0.25,
           y = input_df_vis$visualization_LOD[1]*2, 
           label = "LOD", size = 4, color = "red") +
  theme_classic() +
  scale_y_log10(limits = c(0.0001, 0.1)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("Post_Vaccine_Expanded" = "lightblue", "Post_Nivo_Expanded" = "salmon")) +
  scale_color_manual(values = c("Post_Vaccine_Expanded" = "blue", "Post_Nivo_Expanded" = "red"))
return(p1)
}

3.13 Plotting Mean Frequency Plots for each Patient

two_timepoint_order <- c("Prevaccine_umis_fraction", "Postvaccine_umi_fraction")
p9 <- plot_frequency_mean( PT106_test_fisher_categorized, two_timepoint_order, "PT106")
p10 <- plot_frequency_mean(PT111_test_fisher_categorized, two_timepoint_order, "PT111")

three_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction")
p3 <- plot_frequency_mean( PT122_test_fisher_categorized, three_timepoint_order, "PT122")
p4 <- plot_frequency_mean(PT119_test_fisher_categorized, three_timepoint_order, "PT119")
p5 <- plot_frequency_mean(PT118_test_fisher_categorized, three_timepoint_order, "PT118")


four_timepoint_order <- c("Prevaccine_umi_fraction", "Postvaccine_umi_fraction", "Postnivo_umi_fraction", "w40_umi_fraction")
p1 <- plot_frequency_mean( PT108_test_fisher_categorized, four_timepoint_order, "PT108")
p2 <- plot_frequency_mean(PT116_test_fisher_categorized, four_timepoint_order, "PT116")

p6 <- plot_frequency_mean(PT113_test_fisher_categorized, three_timepoint_order, "PT113")
p7 <- plot_frequency_mean(PT114_test_fisher_categorized, three_timepoint_order, "PT114")
p8 <- plot_frequency_mean(PT117_test_fisher_categorized, three_timepoint_order, "PT117")

3.14 Plotting Shannon Diversity for each Patient

#Load in unfiltered dataframe -> consider all clones for diversity calculation

PT106_merged_df <- readRDS("./PT106_merged_df.rds")
PT108_merged_df <- readRDS("./PT108_merged_df.rds")
PT111_merged_df <- readRDS("./PT111_merged_df.rds")
PT113_merged_df <- readRDS("./PT113_merged_df.rds")
PT114_merged_df <- readRDS( "./PT114_merged_df.rds")
PT116_merged_df <- readRDS("./PT116_merged_df.rds")
PT117_merged_df <- readRDS("./PT117_merged_df.rds")
PT118_merged_df <- readRDS("./PT118_merged_df.rds")
PT119_merged_df <- readRDS("./PT119_merged_df.rds")
PT122_merged_df <- readRDS("./PT122_merged_df.rds")

calculate_diversity <- function(data_list, patient_name) {
  results <- data.frame()
  
  for (timepoint in names(data_list)) {
    umi_vector <- data_list[[timepoint]]
    if (length(umi_vector) > 1 && sum(umi_vector) > 0) {
      diversity_val <- diversity(umi_vector, index = "shannon") / log10(length(umi_vector))
    } else {
      diversity_val <- NA
    }
    results <- rbind(results, data.frame(
      Patient = patient_name,
      Timepoint = timepoint,
      Normalized_Shannon = diversity_val
    ))
  }
  return(results)
}

# Define timepoint lists for each patient
PT106_list <- list(
  Prevaccine_umi = PT106_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT106_merged_df$Postvaccine_umi,
  w16_umi = PT106_merged_df$w16_umi
)

PT108_list <- list(
  Prevaccine_umi = PT108_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT108_merged_df$Postvaccine_umi,
  Postnivo_umi = PT108_merged_df$Postnivo_umi,
  w40_umi = PT108_merged_df$w40_umi
)

PT111_list <- list(
  Prevaccine_umi = PT111_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT111_merged_df$Postvaccine_umi
)

PT113_list <- list(
  Prevaccine_umi = PT113_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT113_merged_df$Postvaccine_umi,
  Postnivo_umi = PT113_merged_df$Postnivo_umi
)

PT114_list <- list(
  Prevaccine_umi = PT114_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT114_merged_df$Postvaccine_umi,
  Postnivo_umi = PT114_merged_df$Postnivo_umi
)

PT116_list <- list(
  Prevaccine_umi = PT116_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT116_merged_df$Postvaccine_umi,
  Postnivo_umi = PT116_merged_df$Postnivo_umi,
  w40_umi = PT116_merged_df$w40_umi
)

PT117_list <- list(
  Prevaccine_umi = PT117_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT117_merged_df$Postvaccine_umi,
  Postnivo_umi = PT117_merged_df$Postnivo_umi
)

PT118_list <- list(
  Prevaccine_umi = PT118_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT118_merged_df$Postvaccine_umi,
  Postnivo_umi = PT118_merged_df$Postnivo_umi
)

PT119_list <- list(
  Prevaccine_umi = PT119_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT119_merged_df$Postvaccine_umi,
  Postnivo_umi = PT119_merged_df$Postnivo_umi
)

PT122_list <- list(
  Prevaccine_umi = PT122_merged_df$Prevaccine_umi,
  Postvaccine_umi = PT122_merged_df$Postvaccine_umi,
  Postnivo_umi = PT122_merged_df$Postnivo_umi
)

# Apply to all patients
all_diversity <- rbind(
  calculate_diversity(PT106_list, "PT106"),
  calculate_diversity(PT108_list, "PT108"),
  calculate_diversity(PT111_list, "PT111"),
  calculate_diversity(PT113_list, "PT113"),
  calculate_diversity(PT114_list, "PT114"),
  calculate_diversity(PT116_list, "PT116"),
  calculate_diversity(PT117_list, "PT117"),
  calculate_diversity(PT118_list, "PT118"),
  calculate_diversity(PT119_list, "PT119"),
  calculate_diversity(PT122_list, "PT122")
)

3.15 Assign Response Groups to Diversity Object and Plot

# Base colors for each response group
base_colors <- c(
  "Progressive" = "#E69F00",
  "Transient_Response" = "#56B4E9",
  "Stable_Disease" = "#009E73"
)

# Define patients per group
progressive <- c("PT111", "PT114", "PT117", "PT113")
transient_response <- c("PT106", "PT108", "PT116")
stable_disease <- c("PT118", "PT119", "PT122")

# Function to generate shades for a vector of patients
generate_shades <- function(patients, base_color) {
  n <- length(patients)
  pal <- scales::gradient_n_pal(c(lighten(base_color, 0.3), base_color, darken(base_color, 0.3)))
  shades <- pal(seq(0, 1, length.out = n))
  names(shades) <- patients
  return(shades)
}

# Generate color mapping
patient_colors <- c(
  generate_shades(progressive, base_colors["Progressive"]),
  generate_shades(transient_response, base_colors["Transient_Response"]),
  generate_shades(stable_disease, base_colors["Stable_Disease"])
)

# Example: assign patient colors to the dataframe
all_diversity <- all_diversity %>%
  mutate(patient_color = patient_colors[Patient])

# Add response column
all_diversity <- all_diversity %>%
  mutate(response = case_when(
    Patient %in% progressive ~ "Progressive",
    Patient %in% transient_response ~ "Transient_Response",
    Patient %in% stable_disease ~ "Stable_Disease",
    TRUE ~ NA_character_  # For patients not in any group
  ))

all_diversity$response  <- gsub("_", " ", all_diversity$response)
all_diversity$Timepoint <- gsub("_", " ", all_diversity$Timepoint)
all_diversity$Timepoint <- factor(all_diversity$Timepoint, levels= c(
  "Prevaccine umi",
  "Postvaccine umi",
  "Postnivo umi",
  "w16 umi",
  "w40 umi"
))

p3 <- ggplot(all_diversity, aes(
    x = Timepoint, y = Normalized_Shannon,
    group = Patient, color = Patient
  )) +
  geom_line(alpha = 0.4) +
  geom_point(size = 2, alpha = 0.8) +
  facet_wrap(~response, scales="free_x") +
  theme_classic() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_color_manual(values = patient_colors) +  # assign colors manually
  labs(y = "Normalized Shannon")

p3

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] colorspace_2.1-1      scales_1.4.0          vegan_2.6-10         
 [4] lattice_0.22-7        permute_0.9-7         ggridges_0.5.6       
 [7] cowplot_1.1.3.9000    ComplexHeatmap_2.18.0 colorRamp2_0.1.0     
[10] lubridate_1.9.4       forcats_1.0.0         stringr_1.5.1        
[13] purrr_1.0.4           readr_2.1.5           tidyr_1.3.1          
[16] tibble_3.2.1          ggplot2_3.5.1         tidyverse_2.0.0      
[19] dplyr_1.1.4          

loaded via a namespace (and not attached):
 [1] gtable_0.3.6        circlize_0.4.16     shape_1.4.6.1      
 [4] rjson_0.2.23        xfun_0.52           htmlwidgets_1.6.4  
 [7] GlobalOptions_0.1.2 tzdb_0.5.0          Cairo_1.6-2        
[10] vctrs_0.6.5         tools_4.3.2         generics_0.1.3     
[13] stats4_4.3.2        parallel_4.3.2      cluster_2.1.8.1    
[16] pkgconfig_2.0.3     Matrix_1.6-5        RColorBrewer_1.1-3 
[19] S4Vectors_0.40.2    lifecycle_1.0.4     compiler_4.3.2     
[22] farver_2.1.2        codetools_0.2-20    clue_0.3-66        
[25] htmltools_0.5.8.1   pillar_1.10.1       crayon_1.5.3       
[28] MASS_7.3-60.0.1     magick_2.8.6        iterators_1.0.14   
[31] foreach_1.5.2       nlme_3.1-168        tidyselect_1.2.1   
[34] digest_0.6.37       stringi_1.8.7       labeling_0.4.3     
[37] splines_4.3.2       fastmap_1.2.0       cli_3.6.4          
[40] magrittr_2.0.3      dichromat_2.0-0.1   withr_3.0.2        
[43] timechange_0.3.0    rmarkdown_2.29      matrixStats_1.5.0  
[46] png_0.1-8           GetoptLong_1.0.5    hms_1.1.3          
[49] evaluate_1.0.3      knitr_1.50          IRanges_2.36.0     
[52] doParallel_1.0.17   mgcv_1.9-3          rlang_1.1.5        
[55] Rcpp_1.1.0          glue_1.8.0          BiocGenerics_0.48.1
[58] rstudioapi_0.17.1   jsonlite_2.0.0      R6_2.6.1